In this script, I load exchange data from datras and extract CA data (i.e. biological data), which is used for the condition model. In theory I could have joined the CPUE data to the CA data and get all important covariates and variables (oxygen, depth, ices information, densities of cod and flounder). However, the CPUE data has been standardized with respect to trawl speed, gear dimension, sweep length and trawl duration. Because many haul id’s did not have this information, the CPUE data has fewer id’s. In order to not lose condition data because I don’t have haul-level CPUE of cod and flounder, I will instead repeat the data cleaning process here, fit models to cod and flounder CPUE and predict at the location of the condition data.
rm(list = ls())
library(tidyverse); theme_set(theme_light(base_size = 12))
library(readxl)
library(tidylog)
library(RCurl)
library(viridis)
library(RColorBrewer)
library(patchwork)
library(janitor)
library(icesDatras)
library(mapdata)
library(patchwork)
library(rgdal)
library(raster)
library(sf)
library(rgeos)
library(chron)
library(lattice)
library(ncdf4)
remotes::install_github("pbs-assess/sdmTMB"); library(sdmTMB)
library(marmap)
library(rnaturalearth)
library(rnaturalearthdata)
library(mapplots)
library(geosphere)
world <- ne_countries(scale = "medium", returnclass = "sf")
# Specify map ranges
ymin = 54; ymax = 58; xmin = 12; xmax = 22
map_data <- rnaturalearth::ne_countries(
scale = "medium",
returnclass = "sf", continent = "europe")
# Crop the polygon for plotting and efficiency:
# st_bbox(map_data) # find the rough coordinates
swe_coast <- suppressWarnings(suppressMessages(
st_crop(map_data,
c(xmin = xmin, ymin = ymin, xmax = xmax, ymax = ymax))))
# Transform our map into UTM 33 coordinates, which is the equal-area projection we fit in:
utm_zone33 <- 32633
swe_coast_proj <- sf::st_transform(swe_coast, crs = utm_zone33)
#ggplot(swe_coast_proj) + geom_sf()
# Read HH data
# bits_hh <- getDATRAS(record = "HH", survey = "BITS", years = 1991:2020, quarters = 4)
# write.csv(bits_hh, "data/DATRAS_exchange/bits_hh.csv")
bits_hh <- read.csv("data/DATRAS_exchange/bits_hh.csv")
# Read CA data
# bits_ca <- getDATRAS(record = "CA", survey = "BITS", years = 1991:2020, quarters = 4)
# write.csv(bits_ca, "data/DATRAS_exchange/bits_ca.csv")
bits_ca <- read.csv("data/DATRAS_exchange/bits_ca.csv")
# Before creating a a new ID, make sure that countries and ships names use the same format
sort(unique(bits_hh$Ship))
#> [1] "06S1" "06SL" "26D4" "26HF" "26HI" "67BC" "77AR" "77SE" "AA36" "ESLF"
#> [11] "ESTM" "LTDA" "RUJB" "RUNT"
# Change back to the old Ship name standard...
# https://vocab.ices.dk/?ref=315
# https://vocab.ices.dk/?ref=315
# Assumptions:
# SOL is Solea on ICES links above, and SOL1 is the older one of the two SOLs (1 and 2)
# DAN is Dana
# sweep %>% filter(Ship == "DANS") %>% distinct(Year, Country)
# sweep %>% filter(Ship == "DAN2") %>% distinct(Year)
# bits_hh %>% filter(Ship == "67BC") %>% distinct(Year, Country)
# sweep %>% filter(Ship == "DAN2") %>% distinct(Year)
# bits_hh %>% filter(Ship == "26D4") %>% distinct(Year) # Strange that 26DF doesn't extend far back. Which ship did the Danes use? Ok, I have no Danish data that old.
# bits_hh %>% filter(Country == "DK") %>% distinct(Year)
bits_hh <- bits_hh %>%
mutate(Ship2 = fct_recode(Ship,
"SOL" = "06S1",
"SOL2" = "06SL",
"DAN2" = "26D4",
"HAF" = "26HF",
"HAF" = "26HI",
"HAF" = "67BC",
"BAL" = "67BC",
"ARG" = "77AR",
"77SE" = "77SE",
"AA36" = "AA36",
"KOOT" = "ESLF",
"KOH" = "ESTM",
"DAR" = "LTDA",
"ATLD" = "RUJB",
"ATL" = "RUNT"),
Ship2 = as.character(Ship2)) %>%
mutate(Ship3 = ifelse(Country == "LV" & Ship2 == "BAL", "BALL", Ship2))
bits_ca <- bits_ca %>%
mutate(Ship2 = fct_recode(Ship,
"SOL" = "06S1",
"SOL2" = "06SL",
"DAN2" = "26D4",
"HAF" = "26HF",
"HAF" = "26HI",
"HAF" = "67BC",
"BAL" = "67BC",
"ARG" = "77AR",
"77SE" = "77SE",
"AA36" = "AA36",
"KOOT" = "ESLF",
"KOH" = "ESTM",
"DAR" = "LTDA",
"ATLD" = "RUJB",
"ATL" = "RUNT"),
Ship2 = as.character(Ship2)) %>%
mutate(Ship3 = ifelse(Country == "LV" & Ship2 == "BAL", "BALL", Ship2))
# Now check which country codes are used
sort(unique(bits_hh$Country))
#> [1] "DE" "DK" "EE" "LT" "LV" "PL" "RU" "SE"
# https://www.nationsonline.org/oneworld/country_code_list.htm#E
bits_hh <- bits_hh %>%
mutate(Country = fct_recode(Country,
"DEN" = "DK",
"EST" = "EE",
"GFR" = "DE",
"LAT" = "LV",
"LTU" = "LT",
"POL" = "PL",
"RUS" = "RU",
"SWE" = "SE"),
Country = as.character(Country))
bits_ca <- bits_ca %>%
mutate(Country = fct_recode(Country,
"DEN" = "DK",
"EST" = "EE",
"GFR" = "DE",
"LAT" = "LV",
"LTU" = "LT",
"POL" = "PL",
"RUS" = "RU",
"SWE" = "SE"),
Country = as.character(Country))
# Gear? Are they the same?
sort(unique(bits_hh$Gear))
#> [1] "ESB" "EXP" "FOT" "GOV" "GRT" "H20" "LBT" "PEL" "SON" "TVL" "TVS"
# Create ID column
bits_ca <- bits_ca %>%
mutate(IDx = paste(Year, Quarter, Country, Ship, Gear, StNo, HaulNo, sep = "."))
bits_hh <- bits_hh %>%
mutate(IDx = paste(Year, Quarter, Country, Ship, Gear, StNo, HaulNo, sep = "."))
# Works like a haul-id
# bits_hh %>% group_by(IDx) %>% mutate(n = n()) %>% ungroup() %>% distinct(n)
bits_hh <- bits_hh %>%
mutate(haul.id = paste(Year, Quarter, Country, Ship3, Gear, StNo, HaulNo, sep = ":"))
# Select just valid, additional and no oxygen hauls
bits_hh <- bits_hh %>%
filter(HaulVal %in% c("A","N","V"))
# Add ICES rectangle
bits_hh$Rect <- mapplots::ices.rect2(lon = bits_hh$ShootLong, lat = bits_hh$ShootLat)
# Add ICES subdivisions
shape <- shapefile("data/ICES_StatRec_mapto_ICES_Areas/StatRec_map_Areas_Full_20170124.shp")
pts <- SpatialPoints(cbind(bits_hh$ShootLong, bits_hh$ShootLat),
proj4string = CRS(proj4string(shape)))
#> Warning in proj4string(shape): CRS object has comment, which is lost in output
bits_hh$SD <- over(pts, shape)$Area_27
# Rename subdivisions to the more common names and do some more filtering (by sub div and area)
sort(unique(bits_hh$SD))
#> [1] "3.a.20" "3.a.21" "3.b.23" "3.c.22" "3.d.24" "3.d.25"
#> [7] "3.d.26" "3.d.27" "3.d.28.1" "3.d.28.2" "3.d.29"
bits_hh <- bits_hh %>%
mutate(SD = factor(SD),
SD = fct_recode(SD,
"20" = "3.a.20",
"21" = "3.a.21",
"22" = "3.c.22",
"23" = "3.b.23",
"24" = "3.d.24",
"25" = "3.d.25",
"26" = "3.d.26",
"27" = "3.d.27",
"28" = "3.d.28.1",
"28" = "3.d.28.2",
"29" = "3.d.29",
"30" = "3.d.30"),
SD = as.character(SD))
#> Warning: Problem with `mutate()` input `SD`.
#> ℹ Unknown levels in `f`: 3.d.30
#> ℹ Input `SD` is `fct_recode(...)`.
#> Warning: Unknown levels in `f`: 3.d.30
# Match columns from the HH data to the HL and CA data
sort(unique(bits_hh$SD))
#> [1] "20" "21" "22" "23" "24" "25" "26" "27" "28" "29"
sort(colnames(bits_hh))
#> [1] "BotCurDir" "BotCurSpeed" "BotSal"
#> [4] "BotTemp" "Buoyancy" "BySpecRecCode"
#> [7] "CodendMesh" "Country" "DataType"
#> [10] "DateofCalculation" "Day" "DayNight"
#> [13] "Depth" "DepthStratum" "Distance"
#> [16] "DoorSpread" "DoorSurface" "DoorType"
#> [19] "DoorWgt" "Gear" "GearEx"
#> [22] "GroundSpeed" "haul.id" "HaulDur"
#> [25] "HaulLat" "HaulLong" "HaulNo"
#> [28] "HaulVal" "HydroStNo" "IDx"
#> [31] "KiteDim" "MaxTrawlDepth" "MinTrawlDepth"
#> [34] "Month" "Netopening" "PelSampType"
#> [37] "Quarter" "RecordType" "Rect"
#> [40] "Rigging" "SD" "SecchiDepth"
#> [43] "Ship" "Ship2" "Ship3"
#> [46] "ShootLat" "ShootLong" "SpeedWater"
#> [49] "StatRec" "StdSpecRecCode" "StNo"
#> [52] "SurCurDir" "SurCurSpeed" "SurSal"
#> [55] "SurTemp" "Survey" "SweepLngt"
#> [58] "SwellDir" "SwellHeight" "ThClineDepth"
#> [61] "ThermoCline" "Tickler" "TidePhase"
#> [64] "TideSpeed" "TimeShot" "TowDir"
#> [67] "Turbidity" "WarpDen" "Warpdia"
#> [70] "Warplngt" "WgtGroundRope" "WindDir"
#> [73] "WindSpeed" "WingSpread" "X"
#> [76] "Year"
bits_hh_merge <- bits_hh %>%
dplyr::select(SD, Rect, HaulVal, StdSpecRecCode, BySpecRecCode,
IDx, ShootLat, ShootLong)
bits_ca <- left_join(bits_ca, bits_hh_merge, by = "IDx")
# Now filter the subdivisions I want from all data sets
bits_hh <- bits_hh %>% filter(SD %in% c(24, 25, 26, 27, 28))
bits_ca <- bits_ca %>% filter(SD %in% c(24, 25, 26, 27, 28))
bits_ca <- bits_ca %>%
filter(SpecCode %in% c("164712", "126436") & Year < 2020) %>%
mutate(Species = "Cod")
# Now I need to copy rows with NoAtLngt > 1 so that 1 row = 1 ind
# First make a small test
# nrow(bits_ca)
# test_id <- head(filter(bits_ca, CANoAtLngt == 5))$ID[1]
# filter(bits_ca, ID == test_id & CANoAtLngt == 5)
bits_ca <- bits_ca %>% map_df(., rep, .$CANoAtLngt)
# head(data.frame(filter(bits_ca, ID == test_id & CANoAtLngt == 5)), 20)
# nrow(bits_ca)
# Looks ok!
# Standardize length and drop NA weights (need that for condition)
bits_ca <- bits_ca %>%
drop_na(IndWgt) %>%
drop_na(LngtClass) %>%
filter(IndWgt > 0 & LngtClass > 0) %>% # Filter positive length and weight
mutate(weight_g = IndWgt) %>%
mutate(length_cm = ifelse(LngtCode == ".",
LngtClass/10,
LngtClass)) # Standardize length (https://vocab.ices.dk/?ref=18)
# Plot
ggplot(bits_ca, aes(IndWgt, length_cm)) +
geom_point() +
facet_wrap(~Year)
# Remove apparent outlier
bits_ca <- bits_ca %>%
mutate(keep = ifelse(Year == 2010 & IndWgt > 12500, "N", "Y")) %>%
filter(keep == "Y") %>% dplyr::select(-keep)
ggplot(bits_ca, aes(IndWgt, length_cm)) +
geom_point() +
facet_wrap(~Year)
# Rename things and select specific columns
dat <- bits_ca %>% rename("year" = "Year",
"lat" = "ShootLat",
"lon" = "ShootLong",
"quarter" = "Quarter",
"ices_rect" = "Rect") %>%
dplyr::select(weight_g, length_cm, year, lat, lon, quarter, IDx, ices_rect, SD)
# > nrow(dat)
# [1] 98863
# Read the tifs
west <- raster("data/depth_geo_tif/D5_2018_rgb-1.tif")
#plot(west)
east <- raster("data/depth_geo_tif/D6_2018_rgb-1.tif")
# plot(east)
dep_rast <- raster::merge(west, east)
dat$depth <- extract(dep_rast, dat[, 5:4])
# Convert to depth (instead of elevation)
ggplot(dat, aes(depth)) + geom_histogram()
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
dat$depth <- (dat$depth - max(drop_na(dat)$depth)) *-1
#> drop_na: no rows removed
ggplot(dat, aes(depth)) + geom_histogram()
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Downloaded from here: https://resources.marine.copernicus.eu/?option=com_csw&view=details&product_id=BALTICSEA_REANALYSIS_BIO_003_012
# Extract raster points: https://gisday.wordpress.com/2014/03/24/extract-raster-values-from-points-using-r/comment-page-1/
# https://rpubs.com/boyerag/297592
# https://pjbartlein.github.io/REarthSysSci/netCDF.html#get-a-variable
# Open the netCDF file
ncin <- nc_open("data/NEMO_Nordic_SCOBI/dataset-reanalysis-scobi-monthlymeans_1610091357600.nc")
print(ncin)
#> File data/NEMO_Nordic_SCOBI/dataset-reanalysis-scobi-monthlymeans_1610091357600.nc (NC_FORMAT_CLASSIC):
#>
#> 1 variables (excluding dimension variables):
#> float o2b[longitude,latitude,time]
#> long_name: Sea_floor_Dissolved_Oxygen_Concentration
#> missing_value: NaN
#> standard_name: mole_concentration_of_dissolved_molecular_oxygen_in_sea_water
#> units: mmol m-3
#> _FillValue: NaN
#> _ChunkSizes: 1
#> _ChunkSizes: 523
#> _ChunkSizes: 383
#>
#> 3 dimensions:
#> time Size:324
#> axis: T
#> long_name: Validity time
#> standard_name: time
#> units: days since 1950-01-01 00:00:00
#> calendar: gregorian
#> _ChunkSizes: 512
#> _CoordinateAxisType: Time
#> valid_min: 15721.5
#> valid_max: 25551.5
#> latitude Size:166
#> axis: Y
#> standard_name: latitude
#> long_name: latitude
#> units: degrees_north
#> _CoordinateAxisType: Lat
#> valid_min: 52.991626739502
#> valid_max: 58.4915390014648
#> longitude Size:253
#> standard_name: longitude
#> long_name: longitude
#> units: degrees_east
#> axis: X
#> _CoordinateAxisType: Lon
#> valid_min: 9.01375484466553
#> valid_max: 23.013614654541
#>
#> 24 global attributes:
#> references: http://www.smhi.se
#> institution: Swedish Meterological and Hydrological Institute
#> history: See source and creation_date attributees
#> Conventions: CF-1.5
#> contact: servicedesk_cmems@mercator-ocean.eu
#> comment: Provided by SMHI as a Copernicus Marine Environment Monitoring Service production unit
#> bullentin_type: reanalysis
#> cmems_product_id: BALTICSEA_REANALYSIS_BIO_003_012
#> title: CMEMS V4 Reanalysis: SCOBI model 3D fields (monthly means)
#> FROM_ORIGINAL_FILE__easternmost_longitude: 30.2357654571533
#> FROM_ORIGINAL_FILE__northernmost_latitude: 65.8914184570312
#> FROM_ORIGINAL_FILE__westernmost_longitude: 9.01375484466553
#> FROM_ORIGINAL_FILE__southernmost_latitude: 48.49169921875
#> shallowest_depth: 1.50136542320251
#> deepest_depth: 711.059204101562
#> source: SMHI reanalysis run NORDIC-NS2_1d_20191201_20191202
#> file_quality_index: 1
#> creation_date: 2020-11-16 UTC
#> bullentin_date: 20191201
#> start_date: 2019-12-01 UTC
#> stop_date: 2019-12-01 UTC
#> start_time: 00:00 UTC
#> stop_time: 00:00 UTC
#> _CoordSysBuilder: ucar.nc2.dataset.conv.CF1Convention
# Get longitude and latitude
lon <- ncvar_get(ncin,"longitude")
nlon <- dim(lon)
head(lon)
#> [1] 9.013755 9.069310 9.124865 9.180420 9.235975 9.291530
lat <- ncvar_get(ncin,"latitude")
nlat <- dim(lat)
head(lat)
#> [1] 52.99163 53.02496 53.05829 53.09163 53.12496 53.15829
# Get time
time <- ncvar_get(ncin,"time")
time
#> [1] 15721.5 15751.0 15780.5 15811.0 15841.5 15872.0 15902.5 15933.5 15964.0
#> [10] 15994.5 16025.0 16055.5 16086.5 16116.0 16145.5 16176.0 16206.5 16237.0
#> [19] 16267.5 16298.5 16329.0 16359.5 16390.0 16420.5 16451.5 16481.0 16510.5
#> [28] 16541.0 16571.5 16602.0 16632.5 16663.5 16694.0 16724.5 16755.0 16785.5
#> [37] 16816.5 16846.5 16876.5 16907.0 16937.5 16968.0 16998.5 17029.5 17060.0
#> [46] 17090.5 17121.0 17151.5 17182.5 17212.0 17241.5 17272.0 17302.5 17333.0
#> [55] 17363.5 17394.5 17425.0 17455.5 17486.0 17516.5 17547.5 17577.0 17606.5
#> [64] 17637.0 17667.5 17698.0 17728.5 17759.5 17790.0 17820.5 17851.0 17881.5
#> [73] 17912.5 17942.0 17971.5 18002.0 18032.5 18063.0 18093.5 18124.5 18155.0
#> [82] 18185.5 18216.0 18246.5 18277.5 18307.5 18337.5 18368.0 18398.5 18429.0
#> [91] 18459.5 18490.5 18521.0 18551.5 18582.0 18612.5 18643.5 18673.0 18702.5
#> [100] 18733.0 18763.5 18794.0 18824.5 18855.5 18886.0 18916.5 18947.0 18977.5
#> [109] 19008.5 19038.0 19067.5 19098.0 19128.5 19159.0 19189.5 19220.5 19251.0
#> [118] 19281.5 19312.0 19342.5 19373.5 19403.0 19432.5 19463.0 19493.5 19524.0
#> [127] 19554.5 19585.5 19616.0 19646.5 19677.0 19707.5 19738.5 19768.5 19798.5
#> [136] 19829.0 19859.5 19890.0 19920.5 19951.5 19982.0 20012.5 20043.0 20073.5
#> [145] 20104.5 20134.0 20163.5 20194.0 20224.5 20255.0 20285.5 20316.5 20347.0
#> [154] 20377.5 20408.0 20438.5 20469.5 20499.0 20528.5 20559.0 20589.5 20620.0
#> [163] 20650.5 20681.5 20712.0 20742.5 20773.0 20803.5 20834.5 20864.0 20893.5
#> [172] 20924.0 20954.5 20985.0 21015.5 21046.5 21077.0 21107.5 21138.0 21168.5
#> [181] 21199.5 21229.5 21259.5 21290.0 21320.5 21351.0 21381.5 21412.5 21443.0
#> [190] 21473.5 21504.0 21534.5 21565.5 21595.0 21624.5 21655.0 21685.5 21716.0
#> [199] 21746.5 21777.5 21808.0 21838.5 21869.0 21899.5 21930.5 21960.0 21989.5
#> [208] 22020.0 22050.5 22081.0 22111.5 22142.5 22173.0 22203.5 22234.0 22264.5
#> [217] 22295.5 22325.0 22354.5 22385.0 22415.5 22446.0 22476.5 22507.5 22538.0
#> [226] 22568.5 22599.0 22629.5 22660.5 22690.5 22720.5 22751.0 22781.5 22812.0
#> [235] 22842.5 22873.5 22904.0 22934.5 22965.0 22995.5 23026.5 23056.0 23085.5
#> [244] 23116.0 23146.5 23177.0 23207.5 23238.5 23269.0 23299.5 23330.0 23360.5
#> [253] 23391.5 23421.0 23450.5 23481.0 23511.5 23542.0 23572.5 23603.5 23634.0
#> [262] 23664.5 23695.0 23725.5 23756.5 23786.0 23815.5 23846.0 23876.5 23907.0
#> [271] 23937.5 23968.5 23999.0 24029.5 24060.0 24090.5 24121.5 24151.5 24181.5
#> [280] 24212.0 24242.5 24273.0 24303.5 24334.5 24365.0 24395.5 24426.0 24456.5
#> [289] 24487.5 24517.0 24546.5 24577.0 24607.5 24638.0 24668.5 24699.5 24730.0
#> [298] 24760.5 24791.0 24821.5 24852.5 24882.0 24911.5 24942.0 24972.5 25003.0
#> [307] 25033.5 25064.5 25095.0 25125.5 25156.0 25186.5 25217.5 25247.0 25276.5
#> [316] 25307.0 25337.5 25368.0 25398.5 25429.5 25460.0 25490.5 25521.0 25551.5
tunits <- ncatt_get(ncin,"time","units")
nt <- dim(time)
nt
#> [1] 324
tunits
#> $hasatt
#> [1] TRUE
#>
#> $value
#> [1] "days since 1950-01-01 00:00:00"
# Get oxygen
dname <- "o2b"
oxy_array <- ncvar_get(ncin,dname)
dlname <- ncatt_get(ncin,dname,"long_name")
dunits <- ncatt_get(ncin,dname,"units")
fillvalue <- ncatt_get(ncin,dname,"_FillValue")
dim(oxy_array)
#> [1] 253 166 324
# Get global attributes
title <- ncatt_get(ncin,0,"title")
institution <- ncatt_get(ncin,0,"institution")
datasource <- ncatt_get(ncin,0,"source")
references <- ncatt_get(ncin,0,"references")
history <- ncatt_get(ncin,0,"history")
Conventions <- ncatt_get(ncin,0,"Conventions")
# Convert time: split the time units string into fields
tustr <- strsplit(tunits$value, " ")
tdstr <- strsplit(unlist(tustr)[3], "-")
tmonth <- as.integer(unlist(tdstr)[2])
tday <- as.integer(unlist(tdstr)[3])
tyear <- as.integer(unlist(tdstr)[1])
# Here I deviate from the guide a little bit. Save this info:
dates <- chron(time, origin = c(tmonth, tday, tyear))
# Crop the date variable
months <- as.numeric(substr(dates, 2, 3))
years <- as.numeric(substr(dates, 8, 9))
years <- ifelse(years > 90, 1900 + years, 2000 + years)
# Replace netCDF fill values with NA's
oxy_array[oxy_array == fillvalue$value] <- NA
# We only use Quarter 4 in this analysis, so now we want to loop through each time step,
# and if it is a good month save it as a raster.
# First get the index of months that correspond to Q4
months
#> [1] 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1
#> [26] 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2
#> [51] 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3
#> [76] 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4
#> [101] 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5
#> [126] 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6
#> [151] 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7
#> [176] 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8
#> [201] 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9
#> [226] 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10
#> [251] 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11
#> [276] 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12
#> [301] 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12
index_keep <- which(months > 9)
oxy_q4 <- oxy_array[, , index_keep]
months_keep <- months[index_keep]
years_keep <- years[index_keep]
# Now we have an array with only Q4 data...
# We need to now calculate the average within a year.
# Get a sequence that takes every third value between 1: number of months (length)
loop_seq <- seq(1, dim(oxy_q4)[3], by = 3)
# Create objects that will hold data
dlist <- list()
oxy_10 <- c()
oxy_11 <- c()
oxy_12 <- c()
oxy_ave <- c()
# Loop through the vector sequence with every third value, then take the average of
# three consecutive months (i.e. q4)
for(i in loop_seq) {
oxy_10 <- oxy_q4[, , (i)]
oxy_11 <- oxy_q4[, , (i + 1)]
oxy_12 <- oxy_q4[, , (i + 2)]
oxy_ave <- (oxy_10 + oxy_11 + oxy_12) / 3
list_pos <- ((i/3) - (1/3)) + 1 # to get index 1:n(years)
dlist[[list_pos]] <- oxy_ave
}
# Now name the lists with the year:
names(dlist) <- unique(years_keep)
# Now I need to make a loop where I extract the raster value for each year...
# The cpue data is called dat so far in this script
# Filter years in the cpue data frame to only have the years I have oxygen for
d_sub_oxy <- dat %>% filter(year %in% names(dlist)) %>% droplevels()
#> filter: removed 1,305 rows (1%), 97,558 rows remaining
# Create data holding object
data_list <- list()
# ... And for the oxygen raster
raster_list <- list()
# Create factor year for indexing the list in the loop
d_sub_oxy$year_f <- as.factor(d_sub_oxy$year)
# Loop through each year and extract raster values for the cpue data points
for(i in unique(d_sub_oxy$year_f)) {
# Set plot limits
ymin = 54; ymax = 58; xmin = 12; xmax = 22
# Subset a year
oxy_slice <- dlist[[i]]
# Create raster for that year (i)
r <- raster(t(oxy_slice), xmn = min(lon), xmx = max(lon), ymn = min(lat), ymx = max(lat),
crs = CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs+ towgs84=0,0,0"))
# Flip...
r <- flip(r, direction = 'y')
#plot(r, main = i)
# Filter the same year (i) in the cpue data and select only coordinates
d_slice <- d_sub_oxy %>% filter(year_f == i) %>% dplyr::select(lon, lat)
# Make into a SpatialPoints object
data_sp <- SpatialPoints(d_slice)
# Extract raster value (oxygen)
rasValue <- raster::extract(r, data_sp)
# Now we want to plot the results of the raster extractions by plotting the cpue
# data points over a raster and saving it for each year.
# Make the SpatialPoints object into a raster again (for pl)
df <- as.data.frame(data_sp)
# Add in the raster value in the df holding the coordinates for the cpue data
d_slice$oxy <- rasValue
# Add in which year
d_slice$year <- i
# Create a index for the data last where we store all years (because our loop index
# i is not continuous, we can't use it directly)
index <- as.numeric(d_slice$year)[1] - 1992
# Add each years' data in the list
data_list[[index]] <- d_slice
# Save to check each year is ok! First convert the raster to points for plotting
# (so that we can use ggplot)
map.p <- rasterToPoints(r)
# Make the points a dataframe for ggplot
df_rast <- data.frame(map.p)
# Rename y-variable and add year
df_rast <- df_rast %>% rename("oxy" = "layer") %>% mutate(year = i)
# Add each years' raster data frame in the list
raster_list[[index]] <- df_rast
# Make appropriate column headings
colnames(df_rast) <- c("Longitude", "Latitude", "oxy")
# Now make the map
ggplot(data = df_rast, aes(y = Latitude, x = Longitude)) +
geom_raster(aes(fill = oxy)) +
geom_point(data = d_slice, aes(x = lon, y = lat, fill = oxy),
color = "black", size = 5, shape = 21) +
theme_bw() +
geom_sf(data = world, inherit.aes = F, size = 0.2) +
coord_sf(xlim = c(xmin, xmax),
ylim = c(ymin, ymax)) +
scale_colour_gradientn(colours = rev(terrain.colors(10)),
limits = c(-200, 400)) +
scale_fill_gradientn(colours = rev(terrain.colors(10)),
limits = c(-200, 400)) +
NULL
ggsave(paste("figures/supp/cpue_oxygen_rasters/", i,".png", sep = ""),
width = 6.5, height = 6.5, dpi = 600)
}
#> filter: removed 96,361 rows (99%), 1,197 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 95,731 rows (98%), 1,827 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 95,528 rows (98%), 2,030 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 95,874 rows (98%), 1,684 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 95,471 rows (98%), 2,087 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 95,757 rows (98%), 1,801 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 95,653 rows (98%), 1,905 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 94,414 rows (97%), 3,144 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 95,525 rows (98%), 2,033 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 95,631 rows (98%), 1,927 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 94,149 rows (97%), 3,409 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 92,678 rows (95%), 4,880 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 91,720 rows (94%), 5,838 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 91,896 rows (94%), 5,662 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 90,570 rows (93%), 6,988 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 91,253 rows (94%), 6,305 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 93,495 rows (96%), 4,063 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 92,789 rows (95%), 4,769 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 93,475 rows (96%), 4,083 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 94,686 rows (97%), 2,872 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 94,286 rows (97%), 3,272 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 93,349 rows (96%), 4,209 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 93,298 rows (96%), 4,260 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 91,379 rows (94%), 6,179 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 92,746 rows (95%), 4,812 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 94,270 rows (97%), 3,288 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 94,524 rows (97%), 3,034 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
# Now create a data frame from the list of all annual values
big_dat_oxy <- dplyr::bind_rows(data_list)
big_raster_dat_oxy <- dplyr::bind_rows(raster_list)
# Plot data, looks like there's big inter-annual variation but a negative trend over time
big_raster_dat_oxy %>%
group_by(year) %>%
drop_na(oxy) %>%
summarise(mean_oxy = mean(oxy)) %>%
mutate(year_num = as.numeric(year)) %>%
ggplot(., aes(year_num, mean_oxy)) +
geom_point(size = 2) +
stat_smooth(method = "lm") +
NULL
#> group_by: one grouping variable (year)
#> drop_na (grouped): no rows removed
#> summarise: now 27 rows and 2 columns, ungrouped
#> mutate: new variable 'year_num' (double) with 27 unique values and 0% NA
#> `geom_smooth()` using formula 'y ~ x'
big_raster_dat_oxy %>%
group_by(year) %>%
drop_na(oxy) %>%
mutate(dead = ifelse(oxy < 0, "Y", "N")) %>%
filter(dead == "Y") %>%
mutate(n = n(),
year_num = as.numeric(year)) %>%
ggplot(., aes(year_num, n)) +
geom_point(size = 2) +
stat_smooth(method = "lm") +
NULL
#> group_by: one grouping variable (year)
#> drop_na (grouped): no rows removed
#> mutate (grouped): new variable 'dead' (character) with 2 unique values and 0% NA
#> filter (grouped): removed 437,238 rows (93%), 31,671 rows remaining
#> mutate (grouped): new variable 'n' (integer) with 27 unique values and 0% NA
#> new variable 'year_num' (double) with 27 unique values and 0% NA
#> `geom_smooth()` using formula 'y ~ x'
# Now add in the new oxygen column in the original data:
str(d_sub_oxy)
#> tibble [97,558 × 11] (S3: tbl_df/tbl/data.frame)
#> $ weight_g : num [1:97558] 1176 1187 1199 1219 1433 ...
#> $ length_cm: num [1:97558] 51 51 51 51 53 55 58 61 8 34 ...
#> $ year : int [1:97558] 1993 1993 1993 1993 1993 1993 1993 1993 1993 1993 ...
#> $ lat : num [1:97558] 54.7 54.7 54.7 54.7 54.7 ...
#> $ lon : num [1:97558] 14.1 14.1 14.1 14.1 14.1 ...
#> $ quarter : int [1:97558] 4 4 4 4 4 4 4 4 4 4 ...
#> $ IDx : chr [1:97558] "1993.4.GFR.06S1.H20.28.41" "1993.4.GFR.06S1.H20.28.41" "1993.4.GFR.06S1.H20.28.41" "1993.4.GFR.06S1.H20.28.41" ...
#> $ ices_rect: chr [1:97558] "38G4" "38G4" "38G4" "38G4" ...
#> $ SD : chr [1:97558] "24" "24" "24" "24" ...
#> $ depth : num [1:97558] 18 18 18 18 18 18 18 18 10 10 ...
#> $ year_f : Factor w/ 27 levels "1993","1994",..: 1 1 1 1 1 1 1 1 1 1 ...
str(big_dat_oxy)
#> tibble [97,558 × 4] (S3: tbl_df/tbl/data.frame)
#> $ lon : num [1:97558] 14.1 14.1 14.1 14.1 14.1 ...
#> $ lat : num [1:97558] 54.7 54.7 54.7 54.7 54.7 ...
#> $ oxy : num [1:97558] 327 327 327 327 327 ...
#> $ year: chr [1:97558] "1993" "1993" "1993" "1993" ...
# Create an ID for matching the oxygen data with the cpue data
dat$id_oxy <- paste(dat$year, dat$lon, dat$lat, sep = "_")
big_dat_oxy$id_oxy <- paste(big_dat_oxy$year, big_dat_oxy$lon, big_dat_oxy$lat, sep = "_")
# Which id's are not in the cpue data (dat)?
ids <- dat$id_oxy[!dat$id_oxy %in% c(big_dat_oxy$id_oxy)]
unique(ids)
#> [1] "1991_14.0333_54.85" "1991_14.1167_54.6667" "1991_13.7_54.75"
#> [4] "1991_13.85_54.7833" "1991_13.8_54.65" "1991_13.6667_54.6833"
#> [7] "1991_13.5667_54.7167" "1991_13.45_54.75" "1991_13.8_54.5167"
#> [10] "1991_14.15_54.5167" "1991_13.2435_54.9697" "1991_13.1167_54.95"
#> [13] "1991_13.6833_54.9833" "1991_13.95_54.9333" "1991_14.0833_54.5833"
#> [16] "1991_14.25_54.6" "1991_13.4833_55.0167" "1991_13.6_55.0333"
#> [19] "1991_13.85_55.1" "1991_13.9042_55.0183" "1991_13.2772_55.21"
#> [22] "1991_13.6492_55.2135" "1991_15.6596_55.4523" "1991_15.3273_55.9365"
#> [25] "1991_15.3685_55.95" "1991_15.9797_55.8632" "1991_16.0537_55.7407"
#> [28] "1991_16.431_55.5713" "1991_17.0363_55.9378" "1991_18.4345_56.224"
#> [31] "1991_18.7987_57.1543" "1991_17.668_55.8398" "1991_16.4383_55.6667"
#> [34] "1991_17.7227_56.0398" "1991_14.4605_55.4568" "1991_14.2167_55.15"
#> [37] "1991_14.2833_55.1833" "1991_14.2833_55" "1991_18.9297_57.112"
#> [40] "1991_18.8237_57.0395" "1991_19.1567_57.3528" "1991_17.0417_57.5"
#> [43] "1991_14.488_55.6708" "1991_14.7052_55.574" "1991_17.5455_57.496"
#> [46] "1991_18.1203_57.8185" "1991_19.406_57.8703" "1991_19.5915_57.8896"
#> [49] "1992_14.2833_55.1833" "1992_14.1667_55.1333" "1992_13.6333_55"
#> [52] "1992_13.1167_54.7167" "1992_13.3333_54.7" "1992_13.8_54.5167"
#> [55] "1992_13.55_54.7333" "1992_14.3_55.0333" "1992_13.9_54.5"
#> [58] "1992_13.65_54.6833" "1992_13.8_54.6333" "1992_14.35_55.1"
#> [61] "1992_13.8167_54.75" "1992_13.7_54.75" "1992_13.2_54.8667"
#> [64] "1992_13.1_54.9167" "1992_13.1167_54.9833" "1992_14.1_54.7"
#> [67] "1992_14.0333_54.8667" "1992_14.2_54.5" "1992_14.3667_54.5167"
#> [70] "1992_14.2167_54.6333" "1992_13.9667_54.9333" "1992_13.8833_54.5667"
#> [73] "1992_13.1167_54.65" "1992_14.1333_54.5667" "1992_13.45_55.0167"
#> [76] "1992_13.7667_55.1" "1992_13.55_55.0333" "1992_13.8667_55.1"
# Select only the columns we want to merge
big_dat_sub_oxy <- big_dat_oxy %>% dplyr::select(id_oxy, oxy)
# Remove duplicate ID (one oxy value per id)
big_dat_sub_oxy2 <- big_dat_sub_oxy %>% distinct(id_oxy, .keep_all = TRUE)
#> distinct: removed 94,317 rows (97%), 3,241 rows remaining
# big_dat_sub_oxy %>% group_by(id_oxy) %>% mutate(n = n()) %>% arrange(desc(n))
# Join the data with raster-derived oxygen with the full cpue data
dat <- left_join(dat, big_dat_sub_oxy2, by = "id_oxy")
#> left_join: added one column (oxy)
#> > rows only in x 1,305
#> > rows only in y ( 0)
#> > matched rows 97,558
#> > ========
#> > rows total 98,863
# Now the unit of oxygen is mmol/m3. I want it to be ml/L. The original model is in unit ml/L
# and it's been converted by the data host. Since it was converted without accounting for
# pressure or temperature, I can simply use the following conversion factor:
# 1 ml/l = 103/22.391 = 44.661 μmol/l -> 1 ml/l = 0.044661 mmol/l = 44.661 mmol/m^3 -> 0.0223909 ml/l = 1mmol/m^3
# https://ocean.ices.dk/tools/unitconversion.aspx
dat$oxy <- dat$oxy * 0.0223909
# Drop NA oxygen
dat <- dat %>% drop_na(oxy)
#> drop_na: removed 1,419 rows (1%), 97,444 rows remaining
# Open the netCDF file
ncin <- nc_open("data/NEMO_Nordic_SCOBI/dataset-reanalysis-nemo-monthlymeans_1608127623694.nc")
print(ncin)
#> File data/NEMO_Nordic_SCOBI/dataset-reanalysis-nemo-monthlymeans_1608127623694.nc (NC_FORMAT_CLASSIC):
#>
#> 1 variables (excluding dimension variables):
#> float bottomT[longitude,latitude,time]
#> standard_name: sea_water_potential_temperature_at_sea_floor
#> units: degrees_C
#> long_name: Sea floor potential temperature
#> missing_value: NaN
#> _FillValue: NaN
#> _ChunkSizes: 1
#> _ChunkSizes: 523
#> _ChunkSizes: 383
#>
#> 3 dimensions:
#> time Size:324
#> axis: T
#> long_name: Validity time
#> standard_name: time
#> units: days since 1950-01-01 00:00:00
#> calendar: gregorian
#> _ChunkSizes: 512
#> _CoordinateAxisType: Time
#> valid_min: 15721.5
#> valid_max: 25551.5
#> latitude Size:523
#> axis: Y
#> standard_name: latitude
#> long_name: latitude
#> units: degrees_north
#> _CoordinateAxisType: Lat
#> valid_min: 48.49169921875
#> valid_max: 65.8914184570312
#> longitude Size:383
#> standard_name: longitude
#> long_name: longitude
#> units: degrees_east
#> axis: X
#> _CoordinateAxisType: Lon
#> valid_min: 9.01375484466553
#> valid_max: 30.2357654571533
#>
#> 24 global attributes:
#> references: http://www.smhi.se
#> institution: Swedish Meterological and Hydrological Institute
#> history: See source and creation_date attributees
#> Conventions: CF-1.5
#> contact: servicedesk_cmems@mercator-ocean.eu
#> comment: Provided by SMHI as a Copernicus Marine Environment Monitoring Service production unit
#> bullentin_type: reanalysis
#> cmems_product_id: BALTICSEA_REANALYSIS_PHY_003_011
#> title: CMEMS V4 Reanalysis: NEMO model 3D fields (monthly means)
#> FROM_ORIGINAL_FILE__easternmost_longitude: 30.2357654571533
#> FROM_ORIGINAL_FILE__northernmost_latitude: 65.8914184570312
#> FROM_ORIGINAL_FILE__westernmost_longitude: 9.01375484466553
#> FROM_ORIGINAL_FILE__southernmost_latitude: 48.49169921875
#> shallowest_depth: 1.50136542320251
#> deepest_depth: 711.059204101562
#> source: SMHI reanalysis run NORDIC-NS2_1d_20191201_20191202
#> file_quality_index: 1
#> creation_date: 2020-11-16 UTC
#> bullentin_date: 20191201
#> start_date: 2019-12-01 UTC
#> stop_date: 2019-12-01 UTC
#> start_time: 00:00 UTC
#> stop_time: 00:00 UTC
#> _CoordSysBuilder: ucar.nc2.dataset.conv.CF1Convention
# Get longitude and latitude
lon <- ncvar_get(ncin,"longitude")
nlon <- dim(lon)
head(lon)
#> [1] 9.013755 9.069310 9.124865 9.180420 9.235975 9.291530
lat <- ncvar_get(ncin,"latitude")
nlat <- dim(lat)
head(lat)
#> [1] 48.49170 48.52503 48.55836 48.59170 48.62503 48.65836
# Get time
time <- ncvar_get(ncin,"time")
time
#> [1] 15721.5 15751.0 15780.5 15811.0 15841.5 15872.0 15902.5 15933.5 15964.0
#> [10] 15994.5 16025.0 16055.5 16086.5 16116.0 16145.5 16176.0 16206.5 16237.0
#> [19] 16267.5 16298.5 16329.0 16359.5 16390.0 16420.5 16451.5 16481.0 16510.5
#> [28] 16541.0 16571.5 16602.0 16632.5 16663.5 16694.0 16724.5 16755.0 16785.5
#> [37] 16816.5 16846.5 16876.5 16907.0 16937.5 16968.0 16998.5 17029.5 17060.0
#> [46] 17090.5 17121.0 17151.5 17182.5 17212.0 17241.5 17272.0 17302.5 17333.0
#> [55] 17363.5 17394.5 17425.0 17455.5 17486.0 17516.5 17547.5 17577.0 17606.5
#> [64] 17637.0 17667.5 17698.0 17728.5 17759.5 17790.0 17820.5 17851.0 17881.5
#> [73] 17912.5 17942.0 17971.5 18002.0 18032.5 18063.0 18093.5 18124.5 18155.0
#> [82] 18185.5 18216.0 18246.5 18277.5 18307.5 18337.5 18368.0 18398.5 18429.0
#> [91] 18459.5 18490.5 18521.0 18551.5 18582.0 18612.5 18643.5 18673.0 18702.5
#> [100] 18733.0 18763.5 18794.0 18824.5 18855.5 18886.0 18916.5 18947.0 18977.5
#> [109] 19008.5 19038.0 19067.5 19098.0 19128.5 19159.0 19189.5 19220.5 19251.0
#> [118] 19281.5 19312.0 19342.5 19373.5 19403.0 19432.5 19463.0 19493.5 19524.0
#> [127] 19554.5 19585.5 19616.0 19646.5 19677.0 19707.5 19738.5 19768.5 19798.5
#> [136] 19829.0 19859.5 19890.0 19920.5 19951.5 19982.0 20012.5 20043.0 20073.5
#> [145] 20104.5 20134.0 20163.5 20194.0 20224.5 20255.0 20285.5 20316.5 20347.0
#> [154] 20377.5 20408.0 20438.5 20469.5 20499.0 20528.5 20559.0 20589.5 20620.0
#> [163] 20650.5 20681.5 20712.0 20742.5 20773.0 20803.5 20834.5 20864.0 20893.5
#> [172] 20924.0 20954.5 20985.0 21015.5 21046.5 21077.0 21107.5 21138.0 21168.5
#> [181] 21199.5 21229.5 21259.5 21290.0 21320.5 21351.0 21381.5 21412.5 21443.0
#> [190] 21473.5 21504.0 21534.5 21565.5 21595.0 21624.5 21655.0 21685.5 21716.0
#> [199] 21746.5 21777.5 21808.0 21838.5 21869.0 21899.5 21930.5 21960.0 21989.5
#> [208] 22020.0 22050.5 22081.0 22111.5 22142.5 22173.0 22203.5 22234.0 22264.5
#> [217] 22295.5 22325.0 22354.5 22385.0 22415.5 22446.0 22476.5 22507.5 22538.0
#> [226] 22568.5 22599.0 22629.5 22660.5 22690.5 22720.5 22751.0 22781.5 22812.0
#> [235] 22842.5 22873.5 22904.0 22934.5 22965.0 22995.5 23026.5 23056.0 23085.5
#> [244] 23116.0 23146.5 23177.0 23207.5 23238.5 23269.0 23299.5 23330.0 23360.5
#> [253] 23391.5 23421.0 23450.5 23481.0 23511.5 23542.0 23572.5 23603.5 23634.0
#> [262] 23664.5 23695.0 23725.5 23756.5 23786.0 23815.5 23846.0 23876.5 23907.0
#> [271] 23937.5 23968.5 23999.0 24029.5 24060.0 24090.5 24121.5 24151.5 24181.5
#> [280] 24212.0 24242.5 24273.0 24303.5 24334.5 24365.0 24395.5 24426.0 24456.5
#> [289] 24487.5 24517.0 24546.5 24577.0 24607.5 24638.0 24668.5 24699.5 24730.0
#> [298] 24760.5 24791.0 24821.5 24852.5 24882.0 24911.5 24942.0 24972.5 25003.0
#> [307] 25033.5 25064.5 25095.0 25125.5 25156.0 25186.5 25217.5 25247.0 25276.5
#> [316] 25307.0 25337.5 25368.0 25398.5 25429.5 25460.0 25490.5 25521.0 25551.5
tunits <- ncatt_get(ncin,"time","units")
nt <- dim(time)
nt
#> [1] 324
tunits
#> $hasatt
#> [1] TRUE
#>
#> $value
#> [1] "days since 1950-01-01 00:00:00"
# Get temperature
dname <- "bottomT"
temp_array <- ncvar_get(ncin,dname)
dlname <- ncatt_get(ncin,dname,"long_name")
dunits <- ncatt_get(ncin,dname,"units")
fillvalue <- ncatt_get(ncin,dname,"_FillValue")
dim(temp_array)
#> [1] 383 523 324
# Get global attributes
title <- ncatt_get(ncin,0,"title")
institution <- ncatt_get(ncin,0,"institution")
datasource <- ncatt_get(ncin,0,"source")
references <- ncatt_get(ncin,0,"references")
history <- ncatt_get(ncin,0,"history")
Conventions <- ncatt_get(ncin,0,"Conventions")
# Convert time: split the time units string into fields
tustr <- strsplit(tunits$value, " ")
tdstr <- strsplit(unlist(tustr)[3], "-")
tmonth <- as.integer(unlist(tdstr)[2])
tday <- as.integer(unlist(tdstr)[3])
tyear <- as.integer(unlist(tdstr)[1])
# Here I deviate from the guide a little bit. Save this info:
dates <- chron(time, origin = c(tmonth, tday, tyear))
# Crop the date variable
months <- as.numeric(substr(dates, 2, 3))
years <- as.numeric(substr(dates, 8, 9))
years <- ifelse(years > 90, 1900 + years, 2000 + years)
# Replace netCDF fill values with NA's
temp_array[temp_array == fillvalue$value] <- NA
# We only use Quarter 4 in this analysis, so now we want to loop through each time step,
# and if it is a good month save it as a raster.
# First get the index of months that correspond to Q4
months
#> [1] 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1
#> [26] 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2
#> [51] 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3
#> [76] 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4
#> [101] 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5
#> [126] 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6
#> [151] 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7
#> [176] 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8
#> [201] 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9
#> [226] 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10
#> [251] 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11
#> [276] 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12
#> [301] 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12
index_keep <- which(months > 9)
# Quarter 4 by keeping months in index_keep
temp_q4 <- temp_array[, , index_keep]
months_keep <- months[index_keep]
years_keep <- years[index_keep]
# Now we have an array with only Q4 data...
# We need to now calculate the average within a year.
# Get a sequence that takes every third value between 1: number of months (length)
loop_seq <- seq(1, dim(temp_q4)[3], by = 3)
# Create objects that will hold data
dlist <- list()
temp_10 <- c()
temp_11 <- c()
temp_12 <- c()
temp_ave <- c()
# Loop through the vector sequence with every third value, then take the average of
# three consecutive months (i.e. q4)
for(i in loop_seq) {
temp_10 <- temp_q4[, , (i)]
temp_11 <- temp_q4[, , (i + 1)]
temp_12 <- temp_q4[, , (i + 2)]
temp_ave <- (temp_10 + temp_11 + temp_12) / 3
list_pos <- ((i/3) - (1/3)) + 1 # to get index 1:n(years)
dlist[[list_pos]] <- temp_ave
}
# Now name the lists with the year:
names(dlist) <- unique(years_keep)
# Now I need to make a loop where I extract the raster value for each year...
# The cpue data is called dat so far in this script
# Filter years in the cpue data frame to only have the years I have temperature for
d_sub_temp <- dat %>% filter(year %in% names(dlist)) %>% droplevels()
#> filter: no rows removed
# Create data holding object
data_list <- list()
# ... And for the temperature raster
raster_list <- list()
# Create factor year for indexing the list in the loop
d_sub_temp$year_f <- as.factor(d_sub_temp$year)
# Loop through each year and extract raster values for the cpue data points
for(i in unique(d_sub_temp$year_f)) {
# Subset a year
temp_slice <- dlist[[i]]
# Create raster for that year (i)
r <- raster(t(temp_slice), xmn = min(lon), xmx = max(lon), ymn = min(lat), ymx = max(lat),
crs = CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs+ towgs84=0,0,0"))
# Flip...
r <- flip(r, direction = 'y')
#plot(r, main = i)
# Filter the same year (i) in the cpue data and select only coordinates
d_slice <- d_sub_temp %>% filter(year_f == i) %>% dplyr::select(lon, lat)
# Make into a SpatialPoints object
data_sp <- SpatialPoints(d_slice)
# Extract raster value (temperature)
rasValue <- raster::extract(r, data_sp)
# Now we want to plot the results of the raster extractions by plotting the cpue
# data points over a raster and saving it for each year.
# Make the SpatialPoints object into a raster again (for pl)
df <- as.data.frame(data_sp)
# Add in the raster value in the df holding the coordinates for the cpue data
d_slice$temp <- rasValue
# Add in which year
d_slice$year <- i
# Create a index for the data last where we store all years (because our loop index
# i is not continuous, we can't use it directly)
index <- as.numeric(d_slice$year)[1] - 1992
# Add each years' data in the list
data_list[[index]] <- d_slice
# Save to check each year is ok! First convert the raster to points for plotting
# (so that we can use ggplot)
map.p <- rasterToPoints(r)
# Make the points a dataframe for ggplot
df_rast <- data.frame(map.p)
# Rename y-variable and add year
df_rast <- df_rast %>% rename("temp" = "layer") %>% mutate(year = i)
# Add each years' raster data frame in the list
raster_list[[index]] <- df_rast
# Make appropriate column headings
colnames(df_rast) <- c("Longitude", "Latitude", "temp")
# Now make the map
ggplot(data = df_rast, aes(y = Latitude, x = Longitude)) +
geom_raster(aes(fill = temp)) +
geom_point(data = d_slice, aes(x = lon, y = lat, fill = temp),
color = "black", size = 5, shape = 21) +
theme_bw() +
geom_sf(data = world, inherit.aes = F, size = 0.2) +
coord_sf(xlim = c(min(dat$lon), max(dat$lon)),
ylim = c(min(dat$lat), max(dat$lat))) +
scale_colour_gradientn(colours = rev(terrain.colors(10)),
limits = c(2, 17)) +
scale_fill_gradientn(colours = rev(terrain.colors(10)),
limits = c(2, 17)) +
NULL
ggsave(paste("figures/supp/cpue_temp_rasters/", i,".png", sep = ""),
width = 6.5, height = 6.5, dpi = 600)
}
#> filter: removed 96,260 rows (99%), 1,184 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 95,617 rows (98%), 1,827 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 95,414 rows (98%), 2,030 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 95,760 rows (98%), 1,684 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 95,357 rows (98%), 2,087 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 95,643 rows (98%), 1,801 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 95,539 rows (98%), 1,905 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 94,300 rows (97%), 3,144 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 95,411 rows (98%), 2,033 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 95,517 rows (98%), 1,927 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 94,035 rows (97%), 3,409 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 92,577 rows (95%), 4,867 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 91,606 rows (94%), 5,838 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 91,782 rows (94%), 5,662 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 90,456 rows (93%), 6,988 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 91,153 rows (94%), 6,291 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 93,444 rows (96%), 4,000 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 92,677 rows (95%), 4,767 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 93,361 rows (96%), 4,083 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 94,573 rows (97%), 2,871 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 94,172 rows (97%), 3,272 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 93,243 rows (96%), 4,201 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 93,184 rows (96%), 4,260 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 91,265 rows (94%), 6,179 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 92,632 rows (95%), 4,812 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 94,156 rows (97%), 3,288 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 94,410 rows (97%), 3,034 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
# Now create a data frame from the list of all annual values
big_dat_temp <- dplyr::bind_rows(data_list)
big_raster_dat_temp <- dplyr::bind_rows(raster_list)
big_dat_temp %>% drop_na(temp) %>% summarise(max = max(temp))
#> drop_na: no rows removed
#> summarise: now one row and one column, ungrouped
#> # A tibble: 1 x 1
#> max
#> <dbl>
#> 1 14.5
big_dat_temp %>% drop_na(temp) %>% summarise(min = min(temp))
#> drop_na: no rows removed
#> summarise: now one row and one column, ungrouped
#> # A tibble: 1 x 1
#> min
#> <dbl>
#> 1 3.53
# Plot data, looks like there's big inter-annual variation but a positive trend
big_raster_dat_temp %>%
group_by(year) %>%
drop_na(temp) %>%
summarise(mean_temp = mean(temp)) %>%
mutate(year_num = as.numeric(year)) %>%
ggplot(., aes(year_num, mean_temp)) +
geom_point(size = 2) +
stat_smooth(method = "lm") +
NULL
#> group_by: one grouping variable (year)
#> drop_na (grouped): no rows removed
#> summarise: now 27 rows and 2 columns, ungrouped
#> mutate: new variable 'year_num' (double) with 27 unique values and 0% NA
#> `geom_smooth()` using formula 'y ~ x'
# Now add in the new temperature column in the original data:
str(d_sub_temp)
#> tibble [97,444 × 13] (S3: tbl_df/tbl/data.frame)
#> $ weight_g : num [1:97444] 1176 1187 1199 1219 1433 ...
#> $ length_cm: num [1:97444] 51 51 51 51 53 55 58 61 8 34 ...
#> $ year : int [1:97444] 1993 1993 1993 1993 1993 1993 1993 1993 1993 1993 ...
#> $ lat : num [1:97444] 54.7 54.7 54.7 54.7 54.7 ...
#> $ lon : num [1:97444] 14.1 14.1 14.1 14.1 14.1 ...
#> $ quarter : int [1:97444] 4 4 4 4 4 4 4 4 4 4 ...
#> $ IDx : chr [1:97444] "1993.4.GFR.06S1.H20.28.41" "1993.4.GFR.06S1.H20.28.41" "1993.4.GFR.06S1.H20.28.41" "1993.4.GFR.06S1.H20.28.41" ...
#> $ ices_rect: chr [1:97444] "38G4" "38G4" "38G4" "38G4" ...
#> $ SD : chr [1:97444] "24" "24" "24" "24" ...
#> $ depth : num [1:97444] 18 18 18 18 18 18 18 18 10 10 ...
#> $ id_oxy : chr [1:97444] "1993_14.1333_54.7" "1993_14.1333_54.7" "1993_14.1333_54.7" "1993_14.1333_54.7" ...
#> $ oxy : num [1:97444] 7.32 7.32 7.32 7.32 7.32 ...
#> $ year_f : Factor w/ 27 levels "1993","1994",..: 1 1 1 1 1 1 1 1 1 1 ...
str(big_dat_temp)
#> tibble [97,444 × 4] (S3: tbl_df/tbl/data.frame)
#> $ lon : num [1:97444] 14.1 14.1 14.1 14.1 14.1 ...
#> $ lat : num [1:97444] 54.7 54.7 54.7 54.7 54.7 ...
#> $ temp: num [1:97444] 7.86 7.86 7.86 7.86 7.86 ...
#> $ year: chr [1:97444] "1993" "1993" "1993" "1993" ...
# Create an ID for matching the temperature data with the cpue data
dat$id_temp <- paste(dat$year, dat$lon, dat$lat, sep = "_")
big_dat_temp$id_temp <- paste(big_dat_temp$year, big_dat_temp$lon, big_dat_temp$lat, sep = "_")
# Which id's are not in the cpue data (dat)? (It's because I don't have those years, not about the location)
ids <- dat$id_temp[!dat$id_temp %in% c(big_dat_temp$id_temp)]
unique(ids)
#> character(0)
# Select only the columns we want to merge
big_dat_sub_temp <- big_dat_temp %>% dplyr::select(id_temp, temp)
# Remove duplicate ID (one temp value per id)
big_dat_sub_temp2 <- big_dat_sub_temp %>% distinct(id_temp, .keep_all = TRUE)
#> distinct: removed 94,211 rows (97%), 3,233 rows remaining
# Join the data with raster-derived oxygen with the full cpue data
dat <- left_join(dat, big_dat_sub_temp2, by = "id_temp")
#> left_join: added one column (temp)
#> > rows only in x 0
#> > rows only in y ( 0)
#> > matched rows 97,444
#> > ========
#> > rows total 97,444
colnames(dat)
#> [1] "weight_g" "length_cm" "year" "lat" "lon" "quarter"
#> [7] "IDx" "ices_rect" "SD" "depth" "id_oxy" "oxy"
#> [13] "id_temp" "temp"
dat <- dat %>% dplyr::select(-id_temp, -id_oxy)
# Drop NA temp
dat <- dat %>% drop_na(temp)
#> drop_na: no rows removed
# First add UTM coords
# Add UTM coords
# Function
LongLatToUTM <- function(x, y, zone){
xy <- data.frame(ID = 1:length(x), X = x, Y = y)
coordinates(xy) <- c("X", "Y")
proj4string(xy) <- CRS("+proj=longlat +datum=WGS84") ## for example
res <- spTransform(xy, CRS(paste("+proj=utm +zone=",zone," ellps=WGS84",sep='')))
return(as.data.frame(res))
}
utm_coords <- LongLatToUTM(dat$lon, dat$lat, zone = 33)
#> Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj =
#> prefer_proj): Discarded datum Unknown based on WGS84 ellipsoid in CRS definition
dat$X <- utm_coords$X/1000 # for computational reasons
dat$Y <- utm_coords$Y/1000 # for computational reasons
datd <- readr::read_csv("https://raw.githubusercontent.com/maxlindmark/cod_condition/master/data/for_analysis/mdat_cpue.csv")
# Calculate standardized variables
d <- d %>%
mutate(oxy_sc = oxy,
temp_sc = temp,
depth_sc = depth,
) %>%
mutate_at(c("oxy_sc", "temp_sc", "depth_sc"),
~(scale(.) %>% as.vector)) %>%
mutate(year = as.integer(year)) %>%
drop_na(oxy, depth, temp) %>%
rename("density_cod" = "density") # to fit better with how flounder is named
# Also for the condition data which we predict on
dat <- dat %>%
mutate(oxy_sc = (oxy - mean(d$oxy))/sd(d$oxy),
temp_sc = (temp - mean(d$temp))/sd(d$temp),
depth_sc = (depth - mean(d$depth))/sd(d$depth)) %>%
mutate(year = as.integer(year))
hist(dat$oxy_sc)
hist(dat$temp_sc)
hist(dat$depth_sc)
# Explore data a bit
ggplot(d) +
geom_point(aes(year, density_cod, color = factor(SD))) +
facet_wrap(~SD)
# Explore data a bit
ggplot(d) +
geom_point(aes(year, density_fle, color = factor(SD))) +
facet_wrap(~SD)
# Make mesh:
spde <- make_mesh(d, xy_cols = c("X", "Y"),
n_knots = 200,
type = "kmeans", seed = 42)
plot(spde)
# Depth spline + oxy spline
# Cod model
mcod <- sdmTMB(density_cod ~ 0 + as.factor(year) + s(depth_sc, k = 3) + s(oxy_sc, k = 3) + s(temp_sc, k = 3),
data = d, spde = spde, family = tweedie(link = "log"),
fields = "AR1", include_spatial = TRUE, time = "year",
spatial_only = FALSE, reml = FALSE,
control = sdmTMBcontrol(newton_steps = 1))
#> Warning: Detected a `s()` smoother. Smoothers are penalized in sdmTMB as
#> of version 0.0.19, but used to be unpenalized.
#> You no longer need to specify `k`, specifing `k` now indicates
#> an upper limit on the basis functions, and the degree of wiggliness
#> is determined by the data.
tidy(mcod, conf.int = TRUE)
#> term estimate std.error conf.low conf.high
#> 1 as.factor(year)1993 4.669278 0.4290490 3.828358 5.510199
#> 2 as.factor(year)1994 5.096382 0.4102930 4.292222 5.900542
#> 3 as.factor(year)1995 5.569538 0.4033938 4.778901 6.360175
#> 4 as.factor(year)1996 5.062511 0.4049442 4.268835 5.856187
#> 5 as.factor(year)1997 4.356420 0.3877978 3.596350 5.116489
#> 6 as.factor(year)1998 4.421859 0.3943089 3.649028 5.194691
#> 7 as.factor(year)1999 4.290892 0.3823467 3.541506 5.040278
#> 8 as.factor(year)2000 4.648275 0.3623439 3.938094 5.358456
#> 9 as.factor(year)2001 5.019057 0.3520562 4.329040 5.709075
#> 10 as.factor(year)2002 5.583997 0.3423569 4.912990 6.255004
#> 11 as.factor(year)2003 4.404675 0.3672783 3.684823 5.124528
#> 12 as.factor(year)2004 4.879646 0.3706699 4.153146 5.606146
#> 13 as.factor(year)2005 5.319951 0.3383369 4.656823 5.983079
#> 14 as.factor(year)2006 5.019436 0.3208472 4.390587 5.648285
#> 15 as.factor(year)2007 5.280742 0.3169020 4.659625 5.901858
#> 16 as.factor(year)2008 5.414750 0.3119337 4.803371 6.026129
#> 17 as.factor(year)2009 5.386120 0.3198729 4.759181 6.013059
#> 18 as.factor(year)2010 5.512173 0.3229510 4.879201 6.145146
#> 19 as.factor(year)2011 4.769352 0.3193194 4.143497 5.395206
#> 20 as.factor(year)2012 4.346913 0.3272673 3.705481 4.988345
#> 21 as.factor(year)2013 4.769957 0.3239729 4.134982 5.404932
#> 22 as.factor(year)2014 4.497734 0.3228297 3.864999 5.130469
#> 23 as.factor(year)2015 4.627084 0.3233500 3.993330 5.260838
#> 24 as.factor(year)2016 3.949509 0.3235552 3.315353 4.583666
#> 25 as.factor(year)2017 4.478004 0.3202236 3.850378 5.105631
#> 26 as.factor(year)2018 3.530848 0.3255207 2.892839 4.168857
#> 27 as.factor(year)2019 3.515000 0.3766067 2.776865 4.253136
#> 28 s(depth_sc).1 NA NA NA NA
#> 29 s(depth_sc).2 NA NA NA NA
#> 30 s(oxy_sc).1 NA NA NA NA
#> 31 s(oxy_sc).2 NA NA NA NA
#> 32 s(temp_sc).1 NA NA NA NA
#> 33 s(temp_sc).2 NA NA NA NA
# Check residuals of models
d$residualsmcod <- residuals(mcod)
#> Warning: Detected a `s()` smoother. Smoothers are penalized in sdmTMB as
#> of version 0.0.19, but used to be unpenalized.
#> You no longer need to specify `k`, specifing `k` now indicates
#> an upper limit on the basis functions, and the degree of wiggliness
#> is determined by the data.
qqnorm(d$residualsmcod); abline(a = 0, b = 1)
mfle <- sdmTMB(density_fle ~ 0 + as.factor(year) + s(depth_sc, k = 3) + s(oxy_sc, k = 3) + s(temp_sc, k = 3),
data = d, spde = spde, family = tweedie(link = "log"),
fields = "AR1", include_spatial = TRUE, time = "year",
spatial_only = FALSE, reml = FALSE,
control = sdmTMBcontrol(newton_steps = 1))
#> Warning: Detected a `s()` smoother. Smoothers are penalized in sdmTMB as
#> of version 0.0.19, but used to be unpenalized.
#> You no longer need to specify `k`, specifing `k` now indicates
#> an upper limit on the basis functions, and the degree of wiggliness
#> is determined by the data.
#> Warning: The model may not have converged. Maximum final gradient:
#> 0.0197440872310777.
tidy(mfle, conf.int = TRUE)
#> term estimate std.error conf.low conf.high
#> 1 as.factor(year)1993 3.086344 0.5000604 2.106244 4.066445
#> 2 as.factor(year)1994 2.862403 0.4982786 1.885795 3.839011
#> 3 as.factor(year)1995 3.834157 0.4826551 2.888170 4.780144
#> 4 as.factor(year)1996 3.488019 0.4869127 2.533688 4.442351
#> 5 as.factor(year)1997 3.402728 0.4630563 2.495154 4.310302
#> 6 as.factor(year)1998 2.792034 0.4676152 1.875525 3.708543
#> 7 as.factor(year)1999 2.268694 0.4644380 1.358412 3.178975
#> 8 as.factor(year)2000 3.013916 0.4451605 2.141417 3.886414
#> 9 as.factor(year)2001 3.388237 0.4380149 2.529743 4.246730
#> 10 as.factor(year)2002 4.015457 0.4294779 3.173696 4.857218
#> 11 as.factor(year)2003 3.104267 0.4412371 2.239458 3.969076
#> 12 as.factor(year)2004 3.637542 0.4477572 2.759954 4.515130
#> 13 as.factor(year)2005 3.511923 0.4221554 2.684514 4.339333
#> 14 as.factor(year)2006 3.529843 0.4050171 2.736024 4.323662
#> 15 as.factor(year)2007 3.708212 0.4028041 2.918730 4.497693
#> 16 as.factor(year)2008 3.793654 0.3996016 3.010449 4.576859
#> 17 as.factor(year)2009 3.887496 0.4047775 3.094147 4.680846
#> 18 as.factor(year)2010 4.329963 0.4028294 3.540432 5.119494
#> 19 as.factor(year)2011 3.884919 0.3994758 3.101961 4.667877
#> 20 as.factor(year)2012 3.431478 0.4023971 2.642794 4.220162
#> 21 as.factor(year)2013 3.730560 0.4046115 2.937536 4.523584
#> 22 as.factor(year)2014 3.293708 0.4027768 2.504280 4.083136
#> 23 as.factor(year)2015 3.465576 0.4023112 2.677061 4.254092
#> 24 as.factor(year)2016 3.346314 0.4008326 2.560696 4.131931
#> 25 as.factor(year)2017 3.571983 0.4026322 2.782838 4.361127
#> 26 as.factor(year)2018 3.296749 0.4058768 2.501245 4.092253
#> 27 as.factor(year)2019 3.392583 0.4387341 2.532680 4.252486
#> 28 s(depth_sc).1 NA NA NA NA
#> 29 s(depth_sc).2 NA NA NA NA
#> 30 s(oxy_sc).1 NA NA NA NA
#> 31 s(oxy_sc).2 NA NA NA NA
#> 32 s(temp_sc).1 NA NA NA NA
#> 33 s(temp_sc).2 NA NA NA NA
# Check residuals of models
d$residualsmfle <- residuals(mfle)
#> Warning: Detected a `s()` smoother. Smoothers are penalized in sdmTMB as
#> of version 0.0.19, but used to be unpenalized.
#> You no longer need to specify `k`, specifing `k` now indicates
#> an upper limit on the basis functions, and the degree of wiggliness
#> is determined by the data.
qqnorm(d$residualsmfle); abline(a = 0, b = 1)
# Read in pred_grid2 which has oxygen values at location and time and depth:
pred_grid2 <- readr::read_csv("https://raw.githubusercontent.com/maxlindmark/cod_condition/master/data/for_analysis/pred_grid2.csv")
#>
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#> X = col_double(),
#> Y = col_double(),
#> depth = col_double(),
#> year = col_double(),
#> deep = col_character(),
#> oxy = col_double(),
#> temp = col_double(),
#> lon = col_double(),
#> lat = col_double(),
#> subdiv = col_character(),
#> subdiv2 = col_character(),
#> SubDiv = col_double()
#> )
# Standardize data with respect to prediction grid:
pred_grid2 <- pred_grid2 %>%
mutate(year = as.integer(year)) %>%
filter(year %in% c(unique(d$year))) %>%
mutate(depth_sc = (depth - mean(d$depth))/sd(d$depth),
temp_sc = (temp - mean(d$temp))/sd(d$temp),
oxy_sc = (oxy - mean(d$oxy))/sd(d$oxy)) %>% # Need to scale these to the mean and sd in the data!
drop_na(oxy, depth, temp)
#> mutate: converted 'year' from double to integer (0 new NA)
#> filter: no rows removed
#> mutate: new variable 'depth_sc' (double) with 6,565 unique values and 0% NA
#> new variable 'temp_sc' (double) with 252,019 unique values and 3% NA
#> new variable 'oxy_sc' (double) with 252,208 unique values and 3% NA
#> drop_na: removed 9,477 rows (4%), 250,587 rows remaining
# Predict on the pred grid, calculate index
preds_fle <- predict(mfle, newdata = pred_grid2, return_tmb_object = TRUE, area = 2*2)
#> Warning: Detected a `s()` smoother. Smoothers are penalized in sdmTMB as
#> of version 0.0.19, but used to be unpenalized.
#> You no longer need to specify `k`, specifing `k` now indicates
#> an upper limit on the basis functions, and the degree of wiggliness
#> is determined by the data.
index <- get_index(preds_fle, bias_correct = FALSE)
#> Warning: The model may not have converged. Maximum final gradient:
#> 0.0197440866758738.
index <- index %>% mutate(est_t = est/1000, lwr_t = lwr/1000, upr_t = upr/1000)
#> mutate: new variable 'est_t' (double) with 27 unique values and 0% NA
#> new variable 'lwr_t' (double) with 27 unique values and 0% NA
#> new variable 'upr_t' (double) with 27 unique values and 0% NA
# Plot index
ggplot() +
geom_line(data = index, aes(year, est_t, color = "blue")) +
geom_ribbon(data = index, aes(year, ymin = lwr_t, ymax = upr_t, fill = "blue"), alpha = 0.4) +
scale_x_continuous(breaks = scales::pretty_breaks(n = 20)) +
scale_color_brewer(palette = "Dark2") +
scale_fill_brewer(palette = "Dark2") +
guides(color = FALSE) +
labs(x = "Year", y = "Tonnes", fill = "Sub Divisions", label = "") +
theme_light(base_size = 11) +
theme(axis.text.x = element_text(angle = 30),
legend.position = c(0.8, 0.8),
legend.background = element_blank()) +
NULL
# Calculate an index that corresponds to the average so that I can compare it to the data
ncells <- filter(pred_grid2, year == max(pred_grid2$year)) %>% nrow()
#> filter: removed 241,306 rows (96%), 9,281 rows remaining
preds_fle_ave <- predict(mfle, newdata = pred_grid2, return_tmb_object = TRUE, area = 1/ncells)
#> Warning: Detected a `s()` smoother. Smoothers are penalized in sdmTMB as
#> of version 0.0.19, but used to be unpenalized.
#> You no longer need to specify `k`, specifing `k` now indicates
#> an upper limit on the basis functions, and the degree of wiggliness
#> is determined by the data.
index_ave <- get_index(preds_fle_ave, bias_correct = FALSE)
#> Warning: The model may not have converged. Maximum final gradient:
#> 0.0197440866758738.
d_sum <- d %>%
ungroup() %>%
group_by(year) %>%
summarise(mean_density_fle = mean(density_fle))
#> ungroup: no grouping variables
#> group_by: one grouping variable (year)
#> summarise: now 27 rows and 2 columns, ungrouped
ggplot(index_ave, aes(year, est, color = "prediction")) +
geom_line() +
geom_ribbon(aes(ymin = lwr, ymax = upr, fill = "prediction"), alpha = 0.1) +
geom_point(data = d_sum, aes(year, mean_density_fle, color = "data", size = 1.1)) +
geom_line(data = d_sum, aes(year, mean_density_fle, color = "data"), linetype = 2) +
scale_color_brewer(palette = "Dark2") +
scale_fill_brewer(palette = "Dark2") +
guides(fill = FALSE) +
labs(x = "Year", y = expression(paste("Density [kg/", km^2, "]", sep = ""))) +
NULL
# Plot predictions on map
ggplot(swe_coast_proj) +
geom_raster(data = preds_fle$data, aes(x = X * 1000, y = Y * 1000, fill = exp(est))) +
geom_sf(size = 0.3) +
scale_fill_viridis_c(trans = "sqrt") +
facet_wrap(~ year, ncol = 5) +
labs(x = "Longitude", y = "Latitude", fill = expression(kg/km^2)) +
ggtitle("Predicted density (fixed + random)")
## Interesting! Now predict these model on dat - i.e. the condition data so that I can use those as covariates
predict_mcod <- predict(mcod, newdata = dat)
#> Warning: Detected a `s()` smoother. Smoothers are penalized in sdmTMB as
#> of version 0.0.19, but used to be unpenalized.
#> You no longer need to specify `k`, specifing `k` now indicates
#> an upper limit on the basis functions, and the degree of wiggliness
#> is determined by the data.
predict_mfle <- predict(mfle, newdata = dat)
#> Warning: Detected a `s()` smoother. Smoothers are penalized in sdmTMB as
#> of version 0.0.19, but used to be unpenalized.
#> You no longer need to specify `k`, specifing `k` now indicates
#> an upper limit on the basis functions, and the degree of wiggliness
#> is determined by the data.
# Add to data
dat$density_cod <- exp(predict_mcod$est)
dat$density_fle <- exp(predict_mfle$est)
# Test to plot the relationships between a simple condition index and these variables
dat <- dat %>%
mutate(cond = weight_g / (0.01*length_cm^3))
# Remove apparent errors!
dat %>% arrange(desc(cond)) %>% data.frame() %>% head(20)
#> weight_g length_cm year lat lon quarter IDx
#> 1 274.0 3 2015 55.7708 15.2827 4 2015.4.DEN.26D4.TVL.35.4
#> 2 1038.0 20 2015 55.1910 14.3030 4 2015.4.GFR.06SL.TVS.24275.43
#> 3 59.0 8 2015 54.7361 15.9506 4 2015.4.DEN.26D4.TVL.177.29
#> 4 2001.0 26 2007 55.8300 20.1000 4 2007.4.LTU.LTDA.TVS.Nov.7
#> 5 882.0 21 2006 55.7975 16.1985 4 2006.4.DEN.26D4.TVL.51.22
#> 6 314.0 15 2011 55.6498 14.6522 4 2011.4.DEN.26D4.TVL.24.30
#> 7 730.0 20 2006 55.6865 14.4562 4 2006.4.SWE.77AR.TVL.691.27
#> 8 4.0 4 2016 55.6692 14.7286 4 2016.4.DEN.26D4.TVL.18.2
#> 9 4.0 4 2016 55.6692 14.7286 4 2016.4.DEN.26D4.TVL.18.2
#> 10 6.0 5 2019 55.2762 13.7423 4 2019.4.GFR.06SL.TVS.24263.44
#> 11 2.6 4 2016 55.7647 15.3963 4 2016.4.DEN.26D4.TVL.33.5
#> 12 2.6 4 2016 55.7647 15.3963 4 2016.4.DEN.26D4.TVL.33.5
#> 13 199.0 17 2007 57.6500 21.2900 4 2007.4.EST.AA36.TVS.2.9
#> 14 5.0 5 2016 55.6692 14.7286 4 2016.4.DEN.26D4.TVL.18.2
#> 15 5.0 5 2016 55.6692 14.7286 4 2016.4.DEN.26D4.TVL.18.2
#> 16 1.0 3 2005 55.7128 16.7263 4 2005.4.SWE.77AR.TVL.608.9
#> 17 8.0 6 2012 54.8184 14.9531 4 2012.4.DEN.26D4.TVL.114.30
#> 18 1.0 3 2014 55.6985 14.3635 4 2014.4.SWE.26D4.TVL.29.15
#> 19 64.0 12 2015 55.4900 20.6466 4 2015.4.LTU.LTDA.TVS.26153.2
#> 20 1.0 3 2019 55.7087 16.1862 4 2019.4.DEN.26D4.TVL.147.39
#> ices_rect SD depth oxy temp X Y oxy_sc
#> 1 40G5 25 54 4.211386 8.335124 517.7356 6180.607 -0.45068187
#> 2 39G4 24 32 3.580219 12.520613 455.6264 6116.268 -0.73873926
#> 3 38G5 25 40 5.156504 8.856022 561.2060 6065.840 -0.01934049
#> 4 40H0 26 54 6.793494 6.698404 819.3147 6198.930 0.72776304
#> 5 40G6 25 52 2.523286 8.088043 575.1362 6184.192 -1.22111152
#> 6 40G4 25 57 3.699946 6.867472 478.1128 6167.159 -0.68409721
#> 7 40G4 25 59 5.966464 6.644550 465.8105 6171.323 0.35031593
#> 8 40G4 25 60 3.376058 11.666626 482.9291 6169.297 -0.83191612
#> 9 40G4 25 60 3.376058 11.666626 482.9291 6169.297 -0.83191612
#> 10 39G3 24 27 5.246214 13.486569 420.1026 6126.248 0.02160206
#> 11 40G5 25 54 2.664679 11.883115 524.8663 6179.963 -1.15658153
#> 12 40G5 25 54 2.664679 11.883115 524.8663 6179.963 -1.15658153
#> 13 44H1 28 29 7.524208 8.560642 875.0945 6407.166 1.06125291
#> 14 40G4 25 60 3.376058 11.666626 482.9291 6169.297 -0.83191612
#> 15 40G4 25 60 3.376058 11.666626 482.9291 6169.297 -0.83191612
#> 16 40G6 25 40 7.730908 6.096197 608.4566 6175.466 1.15558852
#> 17 38G4 24 43 5.310305 8.318024 496.9863 6074.584 0.05085235
#> 18 40G4 25 31 6.831249 10.660970 459.9947 6172.708 0.74499401
#> 19 39H0 26 44 7.459182 7.222315 856.5823 6163.819 1.03157590
#> 20 40G6 25 61 2.813527 10.754423 574.5343 6174.297 -1.08864911
#> temp_sc depth_sc density_cod density_fle cond
#> 1 0.01853258 0.32469272 1803.01035 46.451333 1014.814815
#> 2 1.79657189 -0.50466319 670.08589 169.075678 12.975000
#> 3 0.23981581 -0.20307923 1221.74913 29.591264 11.523438
#> 4 -0.67676290 0.32469272 981.17104 192.751880 11.384843
#> 5 -0.08642965 0.24929673 603.98199 5.011336 9.523810
#> 6 -0.60494098 0.43778671 1762.30606 112.708677 9.303704
#> 7 -0.69964084 0.51318270 830.73750 29.105067 9.125000
#> 8 1.43378931 0.55088070 867.55963 55.828338 6.250000
#> 9 1.43378931 0.55088070 867.55963 55.828338 6.250000
#> 10 2.20692020 -0.69315318 296.80415 336.644195 4.800000
#> 11 1.52575615 0.32469272 550.56935 31.413118 4.062500
#> 12 1.52575615 0.32469272 550.56935 31.413118 4.062500
#> 13 0.11433520 -0.61775718 77.12135 2724.817167 4.050478
#> 14 1.43378931 0.55088070 867.55963 55.828338 4.000000
#> 15 1.43378931 0.55088070 867.55963 55.828338 4.000000
#> 16 -0.93258677 -0.20307923 575.42186 3.910974 3.703704
#> 17 0.01126845 -0.08998524 532.04545 45.191306 3.703704
#> 18 1.00657635 -0.54236119 1191.06609 332.984641 3.703704
#> 19 -0.45419986 -0.05228724 384.22827 205.259755 3.703704
#> 20 1.04627620 0.58857869 90.53403 7.152002 3.703704
dat <- dat %>% filter(cond < 5)
ggplot(dat) +
geom_point(aes(density_cod, cond), alpha = 0.2) + ggtitle("cod")
ggplot(dat) +
geom_point(aes(density_fle, cond), alpha = 0.2) + ggtitle("fle")
# Now remove the standardized variables, because when I fit the real model, I want to
# standardize them to the prediction grid, not the CPUE data, and I would forget that
# if I don't remove them now
dat <- dat %>% dplyr::select(-oxy_sc, temp_sc, depth_sc)
left_join from the CPUE data using haul.idd_cod_select <- d %>% dplyr::select(density_cod, IDx) %>% rename("density_cod_data" = "density_cod")
d_fle_select <- d %>% dplyr::select(density_fle, IDx) %>% rename("density_fle_data" = "density_fle")
dat <- left_join(dat, d_cod_select)
dat <- left_join(dat, d_fle_select)
# Check how well they correspond
ggplot(dat, aes(density_cod_data, density_fle)) + geom_point() + geom_abline(color = "red")
#> Warning: Removed 10669 rows containing missing values (geom_point).
ggplot(dat, aes(density_fle_data, density_fle)) + geom_point() + geom_abline(color = "red")
#> Warning: Removed 10669 rows containing missing values (geom_point).
# Read data
spr <- read_xlsx("data/BIAS/abundances_rectangles_1991-2019.xlsx",
sheet = 1) %>%
rename("StatRec" = "Rec") %>%
mutate(StatRec = as.factor(StatRec),
Species = "Sprat",
abun_spr = `Age 1`+`Age 2`+`Age 3`+`Age 4`+`Age 5`+`Age 6`+`Age 7`+`Age 8+`, # omitting `0+` here
IDr = paste(StatRec, Year, sep = ".")) # Make new ID)
her <- read_xlsx("data/BIAS/abundances_rectangles_1991-2019.xlsx",
sheet = 2) %>%
as.data.frame() %>%
rename("StatRec" = "Rect2") %>% # This is not called Rec in the data for some reason
mutate(StatRec = as.factor(StatRec),
Species = "Herring",
abun_her = `Age 1`+`Age 2`+`Age 3`+`Age 4`+`Age 5`+`Age 6`+`Age 7`+`Age 8+`, # omitting `1+` here
IDr = paste(StatRec, Year, sep = ".")) # Make new ID
# Plot distribution over time in the whole area
spr %>%
mutate(lon = ices.rect(spr$StatRec)$lon) %>%
mutate(lat = ices.rect(spr$StatRec)$lat) %>%
filter(!StatRec %in% c("41G0", "41G1", "41G2", "42G1", "42G2", "43G1", "43G2", "44G0", "44G1")) %>%
ggplot(., aes(lon, lat, fill = log(abun_spr))) +
geom_raster() +
scale_fill_viridis() +
facet_wrap(~ Year, ncol = 5) +
geom_sf(data = world, inherit.aes = F, size = 0.2) +
coord_sf(xlim = c(xmin, xmax), ylim = c(ymin, ymax)) +
labs(x = "lon", y = "lat") +
NULL
ggsave("figures/supp/spr_distribution.png", width = 10, height = 10, dpi = 600)
her %>%
mutate(lon = ices.rect(her$StatRec)$lon) %>%
mutate(lat = ices.rect(her$StatRec)$lat) %>%
filter(! StatRec %in% c("41G0", "41G1", "41G2", "42G1", "42G2", "43G1", "43G2", "44G0", "44G1")) %>%
ggplot(., aes(lon, lat, fill = log(abun_her))) +
geom_raster() +
scale_fill_viridis() +
facet_wrap(~ Year, ncol = 5) +
geom_sf(data = world, inherit.aes = F, size = 0.2) +
coord_sf(xlim = c(xmin, xmax), ylim = c(ymin, ymax)) +
labs(x = "lon", y = "lat") +
NULL
ggsave("figures/supp/her_distribution.png", width = 10, height = 10, dpi = 600)
# As can be seen in the above plots, we don't have data for all rectangles in all years
# It is important those rectangles are made NA - not 0 - when merging
# Check distribution of data
# https://www.researchgate.net/publication/47933620_Environmental_factors_and_uncertainty_in_fisheries_management_in_the_northern_Baltic_Sea/figures?lo=1
sort(unique(spr$SD))
#> [1] "21" "22" "23" "24" "25" "26" "27" "28_2" "29" "30"
#> [11] "31" "32"
sort(unique(her$SD))
#> [1] "21" "22" "23" "24" "25" "26" "27" "28_2" "29" "30"
#> [11] "31" "32"
# How many unique rows per IDr?
her %>%
group_by(IDr) %>%
mutate(n = n()) %>%
ggplot(., aes(factor(n))) + geom_bar()
spr %>%
group_by(IDr) %>%
mutate(n = n()) %>%
ggplot(., aes(factor(n))) + geom_bar()
# Ok, some ID's with two rows...
spr %>%
group_by(IDr) %>%
mutate(n = n()) %>%
filter(n == 2) %>%
ungroup() %>%
as.data.frame() %>%
head(20)
#> Year SD StatRec Area Age 0 Age 1 Age 2 Age 3 Age 4 Age 5 Age 6 Age 7
#> 1 1991 23 39G2 NA 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.0
#> 2 1991 24 39G2 NA 0.72 4.54 2.84 1.81 0.44 0.41 0.00 0.0
#> 3 1991 24 39G4 NA 0.00 85.12 96.36 69.69 20.55 15.87 0.70 1.4
#> 4 1991 25 39G4 NA 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.0
#> 5 1993 21 41G0 NA 0.00 0.01 0.01 0.00 0.00 0.00 0.00 0.0
#> 6 1993 21 41G2 NA 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.0
#> 7 1993 22 41G0 NA 66.88 6.35 2.87 0.74 0.28 0.00 0.00 0.0
#> 8 1993 23 39G2 NA 0.04 0.29 1.05 1.28 0.23 0.04 0.01 0.0
#> 9 1993 23 41G2 NA 0.00 0.04 0.37 0.47 0.04 0.05 0.00 0.0
#> 10 1993 24 39G2 NA 1.40 10.33 37.19 45.16 8.25 1.24 0.23 0.0
#> 11 1994 21 41G0 NA 1.01 6.79 1.58 0.00 0.00 0.00 0.00 0.0
#> 12 1994 21 41G1 NA 30.94 208.78 48.64 0.00 0.00 0.00 0.00 0.0
#> 13 1994 21 41G2 NA 2.63 12.74 4.24 0.00 0.00 0.00 0.00 0.0
#> 14 1994 22 41G0 NA 11.09 5.60 7.05 0.08 0.02 0.00 0.00 0.0
#> 15 1994 22 41G1 NA 0.61 0.31 0.39 0.00 0.00 0.00 0.00 0.0
#> 16 1994 23 39G2 NA 9.62 0.31 0.00 0.30 0.18 0.10 0.04 0.0
#> 17 1994 23 41G2 NA 42.55 0.88 0.00 0.00 0.00 0.00 0.00 0.0
#> 18 1994 24 39G2 NA 14.21 0.46 0.00 0.44 0.27 0.15 0.06 0.0
#> 19 1994 24 39G4 NA 71.57 12.04 39.94 254.48 222.32 43.48 11.83 0.0
#> 20 1994 25 39G4 NA 1.04 0.00 3.46 7.77 6.57 7.30 3.13 0.0
#> Age 8+ 1+ Species abun_spr IDr n
#> 1 0.00 0.00 Sprat 0.00 39G2.1991 2
#> 2 0.06 10.10 Sprat 10.10 39G2.1991 2
#> 3 3.43 293.12 Sprat 293.12 39G4.1991 2
#> 4 0.00 0.00 Sprat 0.00 39G4.1991 2
#> 5 0.00 0.02 Sprat 0.02 41G0.1993 2
#> 6 0.00 0.00 Sprat 0.00 41G2.1993 2
#> 7 0.00 10.24 Sprat 10.24 41G0.1993 2
#> 8 0.00 2.90 Sprat 2.90 39G2.1993 2
#> 9 0.00 0.97 Sprat 0.97 41G2.1993 2
#> 10 0.00 102.40 Sprat 102.40 39G2.1993 2
#> 11 0.00 8.37 Sprat 8.37 41G0.1994 2
#> 12 0.00 257.42 Sprat 257.42 41G1.1994 2
#> 13 0.00 16.98 Sprat 16.98 41G2.1994 2
#> 14 0.00 12.75 Sprat 12.75 41G0.1994 2
#> 15 0.00 0.70 Sprat 0.70 41G1.1994 2
#> 16 0.00 0.93 Sprat 0.93 39G2.1994 2
#> 17 0.00 0.88 Sprat 0.88 41G2.1994 2
#> 18 0.00 1.38 Sprat 1.38 39G2.1994 2
#> 19 0.00 584.09 Sprat 584.09 39G4.1994 2
#> 20 1.67 29.90 Sprat 29.90 39G4.1994 2
# Seems to be due to rectangles somehow being in different sub divisions.
# I need to group by IDr and summarize
# First check all rectangles with more than one row have two rows and not more
nrow(spr)
#> [1] 2797
nrow(spr %>% group_by(IDr) %>% mutate(n = n()) %>% filter(n == 2))
#> [1] 204
nrow(spr %>% group_by(IDr) %>% mutate(n = n()) %>% filter(!n == 1))
#> [1] 204
spr_sum <- spr %>%
group_by(IDr) %>%
summarise(abun_spr = sum(abun_spr)) %>% # Sum abundance within IDr
distinct(IDr, .keep_all = TRUE) %>% # Remove duplicate IDr
mutate(ID_temp = IDr) %>% # Create temporary IDr that we can use to split in order
# to get Year and StatRect back into the summarized data
separate(ID_temp, c("StatRec", "Year"), sep = 4)
nrow(spr_sum)
#> [1] 2695
nrow(spr)
#> [1] 2797
nrow(spr %>% group_by(IDr) %>% mutate(n = n()) %>% filter(n == 2))
#> [1] 204
filter(spr_sum, IDr == "39G2.1991")
#> # A tibble: 1 x 4
#> IDr abun_spr StatRec Year
#> <chr> <dbl> <chr> <chr>
#> 1 39G2.1991 10.1 39G2 .1991
filter(spr, IDr == "39G2.1991")
#> # A tibble: 2 x 17
#> Year SD StatRec Area `Age 0` `Age 1` `Age 2` `Age 3` `Age 4` `Age 5`
#> <dbl> <chr> <fct> <lgl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1991 23 39G2 NA 0 0 0 0 0 0
#> 2 1991 24 39G2 NA 0.72 4.54 2.84 1.81 0.44 0.41
#> # … with 7 more variables: `Age 6` <dbl>, `Age 7` <dbl>, `Age 8+` <dbl>,
#> # `1+` <dbl>, Species <chr>, abun_spr <dbl>, IDr <chr>
# This should equal 1 (new # rows = old - duplicated IDr)
nrow(spr_sum) / (nrow(spr) - 0.5*nrow(spr %>% group_by(IDr) %>% mutate(n = n()) %>% filter(n == 2)))
#> [1] 1
# How many rows per rectangle?
spr_sum %>%
group_by(IDr) %>%
mutate(n = n()) %>%
ungroup() %>%
distinct(n)
#> # A tibble: 1 x 1
#> n
#> <int>
#> 1 1
# Now do the same for herring
nrow(her)
#> [1] 2797
nrow(her %>% group_by(IDr) %>% mutate(n = n()) %>% filter(n == 2))
#> [1] 204
nrow(her %>% group_by(IDr) %>% mutate(n = n()) %>% filter(!n == 1))
#> [1] 204
her_sum <- her %>%
group_by(IDr) %>%
summarise(abun_her = sum(abun_her)) %>% # Sum abundance within IDr
distinct(IDr, .keep_all = TRUE) %>% # Remove duplicate IDr
mutate(ID_temp = IDr) %>% # Create temporary IDr that we can use to split in order
# to get Year and StatRect back into the summarized data
separate(ID_temp, c("StatRec", "Year"), sep = 4)
nrow(her_sum)
#> [1] 2695
nrow(her)
#> [1] 2797
nrow(her %>% group_by(IDr) %>% mutate(n = n()) %>% filter(n == 2))
#> [1] 204
filter(her_sum, IDr == "39G2.1991")
#> # A tibble: 1 x 4
#> IDr abun_her StatRec Year
#> <chr> <dbl> <chr> <chr>
#> 1 39G2.1991 200. 39G2 .1991
filter(her, IDr == "39G2.1991")
#> Year SD StatRec Area Age 0 Age 1 Age 2 Age 3 Age 4 Age 5 Age 6 Age 7 Age 8+
#> 1 1991 23 39G2 NA 43.06 2.12 3.03 3.49 1.43 0.73 0.19 0.05 0.00
#> 2 1991 24 39G2 NA 122.19 27.29 34.37 70.16 31.02 12.15 8.29 0.54 4.71
#> 1+ Species abun_her IDr
#> 1 11.04 Herring 11.04 39G2.1991
#> 2 188.53 Herring 188.53 39G2.1991
# This should equal 1 (new # rows = old - duplicated IDr)
nrow(her_sum) / (nrow(her) - 0.5*nrow(her %>% group_by(IDr) %>% mutate(n = n()) %>% filter(n == 2)))
#> [1] 1
# How many rows per rectangle?
her_sum %>%
group_by(IDr) %>%
mutate(n = n()) %>%
ungroup() %>%
distinct(n)
#> # A tibble: 1 x 1
#> n
#> <int>
#> 1 1
# Now join pelagic covariates
# Make ices_rect a factor in the main data
dat <- dat %>% mutate(ices_rect = as.factor(ices_rect))
# Create IDr in main data to match pelagics data
dat <- dat %>% mutate(IDr = paste(ices_rect, year, sep = "."))
# Are there any StatRec that are in the condition data that are not in the pelagics data?
dat$ices_rect[!dat$ices_rect %in% her$StatRec]
#> factor(0)
#> 51 Levels: 37G2 37G3 37G4 37G5 37G6 37G8 37G9 38G2 38G3 38G4 38G5 38G6 ... 45H1
dat$ices_rect[!dat$ices_rect %in% spr$StatRec]
#> factor(0)
#> 51 Levels: 37G2 37G3 37G4 37G5 37G6 37G8 37G9 38G2 38G3 38G4 38G5 38G6 ... 45H1
# No, but not all IDr's are present
head(dat$IDr[!dat$IDr %in% her$IDr])
#> [1] "37G4.1993" "37G4.1993" "37G4.1993" "37G4.1993" "40G4.1993" "40G4.1993"
head(dat$IDr[!dat$IDr %in% spr$IDr])
#> [1] "37G4.1993" "37G4.1993" "37G4.1993" "37G4.1993" "40G4.1993" "40G4.1993"
# Filter columns so that I only use sprat and herring IDr's that are in the condition data (don't need the others!)
spr_sum <- spr_sum %>% filter(IDr %in% dat$IDr)
her_sum <- her_sum %>% filter(IDr %in% dat$IDr)
# Select columns from pelagic data to go in dat
spr_sub <- spr_sum %>% dplyr::select(IDr, abun_spr)
her_sub <- her_sum %>% dplyr::select(IDr, abun_her)
# Now join dat and sprat data
dat <- left_join(dat, spr_sub)
nrow(dat)
#> [1] 97435
# And herring..
dat <- left_join(dat, her_sub)
# Now deal with the NA's
unique(is.na(spr_sum$abun_spr))
#> [1] FALSE
unique(is.na(her_sum$abun_her))
#> [1] FALSE
unique(is.na(dat$abun_spr))
#> [1] FALSE TRUE
unique(is.na(dat$abun_her))
#> [1] FALSE TRUE
dat %>% drop_na(abun_spr) %>% arrange(abun_spr) %>% dplyr::select(abun_spr)
#> # A tibble: 94,533 x 1
#> abun_spr
#> <dbl>
#> 1 0
#> 2 0
#> 3 0
#> 4 0
#> 5 0
#> 6 0
#> 7 0
#> 8 0
#> 9 0
#> 10 0
#> # … with 94,523 more rows
dat %>% drop_na(abun_her) %>% arrange(abun_her) %>% dplyr::select(abun_her)
#> # A tibble: 94,533 x 1
#> abun_her
#> <dbl>
#> 1 0
#> 2 0
#> 3 0
#> 4 0
#> 5 0
#> 6 0
#> 7 0
#> 8 0
#> 9 0
#> 10 0
#> # … with 94,523 more rows
# The NA's I have in the DAT are missing pelagic data, i.e. not 0's! Need to drop them unfortunately,
# or fit a model to the data. Since it's only 3% of data, I will simply remove them.
dat <- dat %>% drop_na(abun_spr) %>% drop_na(abun_her)
dat %>% data.frame() %>% head()
#> weight_g length_cm year lat lon quarter IDx
#> 1 1176 51 1993 54.7 14.1333 4 1993.4.GFR.06S1.H20.28.41
#> 2 1187 51 1993 54.7 14.1333 4 1993.4.GFR.06S1.H20.28.41
#> 3 1199 51 1993 54.7 14.1333 4 1993.4.GFR.06S1.H20.28.41
#> 4 1219 51 1993 54.7 14.1333 4 1993.4.GFR.06S1.H20.28.41
#> 5 1433 53 1993 54.7 14.1333 4 1993.4.GFR.06S1.H20.28.41
#> 6 1824 55 1993 54.7 14.1333 4 1993.4.GFR.06S1.H20.28.41
#> ices_rect SD depth oxy temp X Y temp_sc depth_sc
#> 1 38G4 24 18 7.320176 7.858522 444.1463 6061.753 -0.1839327 -1.032435
#> 2 38G4 24 18 7.320176 7.858522 444.1463 6061.753 -0.1839327 -1.032435
#> 3 38G4 24 18 7.320176 7.858522 444.1463 6061.753 -0.1839327 -1.032435
#> 4 38G4 24 18 7.320176 7.858522 444.1463 6061.753 -0.1839327 -1.032435
#> 5 38G4 24 18 7.320176 7.858522 444.1463 6061.753 -0.1839327 -1.032435
#> 6 38G4 24 18 7.320176 7.858522 444.1463 6061.753 -0.1839327 -1.032435
#> density_cod density_fle cond density_cod_data density_fle_data IDr
#> 1 690.7729 187.6693 0.8865369 698.3562 308.4765 38G4.1993
#> 2 690.7729 187.6693 0.8948293 698.3562 308.4765 38G4.1993
#> 3 690.7729 187.6693 0.9038756 698.3562 308.4765 38G4.1993
#> 4 690.7729 187.6693 0.9189527 698.3562 308.4765 38G4.1993
#> 5 690.7729 187.6693 0.9625395 698.3562 308.4765 38G4.1993
#> 6 690.7729 187.6693 1.0963186 698.3562 308.4765 38G4.1993
#> abun_spr abun_her
#> 1 122.39 331.09
#> 2 122.39 331.09
#> 3 122.39 331.09
#> 4 122.39 331.09
#> 5 122.39 331.09
#> 6 122.39 331.09
# Trim data a little bit to not make it unnecessarily large
dat <- dat %>% dplyr::select(-IDx, quarter, cond, IDr)
write.csv(dat, file = "data/for_analysis/mdat_cond.csv", row.names = FALSE)